//  Date:         	08/08/2018
//  task:         	water analysis
//  project:      	World Development

clear all
set more off
version 13.1
graph drop _all
capture log close
set scheme plotplainblind, permanently

do make_index_gr.do
log using Analysis_water_endline.log,replace text

//  #1
//  load data
use ubridge_water.dta, clear

//  #2
//  clean data
	
* first drop variables that have all missing values
findname, all(missing(@))

* second, drop clearly unnecessary variables	
drop matchedmeeting

move HC_dist num_meetings
	replace HC_dist=HC_dist/1000
	lab var HC_dist "Distance to health center (km)" 
	
move Arua_dist num_meetings 
	replace Arua_dist=Arua_dist/1000
	lab var Arua_dist "Distance to Arua (km)" 

	gen ldist=log(Arua_dist)
	lab var ldist "Log distance to Arua" 
	su Arua_dist ldist, de

renvars *_sc,   subst(_sc )
drop age_lc1  
renvars *_lc1,  subst(_lc1 ) 	

ren sc_frac fractionalization
	lab var fractionalization "Religious fractionalization"
ren sc_pol polarization

gen lpop=log(adult_pop)
lab var lpop "Log adult population"

******************************
* Define control
******************************

foreach y in lpop age poverty_census lugbara_share HHI_Ethnicity polarization HHI_Religion literacy education employed_share Nonagriculture_share Arua_dist{
	 egen `y'_median=median(`y')
	 gen `y'_miss = `y'==.
	 tab `y'_miss
	 replace `y' = `y'_median if `y'==.
	 center `y'  , standardize
	 drop `y'_median
}

foreach cov in c_lpop c_age c_poverty_census c_lugbara_share c_polarization c_HHI_Religion c_literacy c_education c_employed_share c_Nonagriculture_share c_Arua_dist{
	reg treat `cov'
	}
	
gl controls c.c_age##treat c.c_lugbara_share##treat c.c_literacy##treat c.c_Arua_dist##treat 
	su $controls

************************************
* First round of water admin data

* Model 1. No cov adjustment (except for baseline)
* Model 2. Add demeaned covariate, interacted with treatment indicator 
************************************

// assign value zero to those not mentioned in the admin data 
// standardize variables 

foreach var in parts_services_14_16 parts_services_13_14 village_requests_14_16 village_requests_13_14{
	quietly summarize `var' if treat==0
	local `var'_mean= r(mean)
	local `var'_sd= r(sd) 
	gen c_`var' = (`var'-``var'_mean')/``var'_sd'
	qui egen mean_std_`var'=mean(c_`var') if treat==1
	replace c_`var' = mean_std_`var' if treat==1 & c_`var'==.
	replace c_`var' = 0 if treat==0 & c_`var'==.
	}
	
tempfile parts_services1 village_requests1 c_parts_services1 c_village_requests1
tempfile parts_services2 village_requests2 c_parts_services2 c_village_requests2
	
foreach y in parts_services village_requests c_parts_services c_village_requests {
	reg `y'_14_16 i.treat `y'_13_14, cl(cluster_id)
		estadd local control "No"
		est store `y'1
		margins, at(treat=(0 1)) 
		test _b[1.treat]=0
		local sign_trt = sign(_b[1.treat])
		display "Ho: coef <= 0  p-value = " ttail(r(df_r),`sign_trt'*sqrt(r(F)))
		est restore `y'1
		margins , dydx(treat) post
	    parmest, label bmat(r(b)) vmat(r(V)) saving(``y'1')
		
	reg `y'_14_16 i.treat `y'_13_14 $controls, cl(cluster_id)
		estadd local control "Yes"
		est store `y'2
		margins, at(treat=(0 1))
		test _b[1.treat]=0
		local sign_trt = sign(_b[1.treat])
		display "Ho: coef <= 0  p-value = " ttail(r(df_r),`sign_trt'*sqrt(r(F))) 
		est restore `y'2
		margins , dydx(treat) post
	    parmest, label bmat(r(b)) vmat(r(V)) saving(``y'2')		
	}

*************************************************************
* Table 11 in Supplementary Information 
*************************************************************
// Table parts and services and village requests:
esttab c_parts_services1 c_parts_services2 c_village_requests1 c_village_requests2

* in non-standardized
esttab parts_services1 parts_services2 village_requests1 village_requests2

// estimation to plot in R
preserve
use `c_parts_services1', clear 
	append using `c_parts_services2' 
	append using `c_village_requests1' 
	append using `c_village_requests2' 

	keep if parm=="1.treat"
	
	gen n=_n
	gen CovAdjust="No"
	replace CovAdjust="Yes" if n==2 | n==4
	
	gen var="Parts & services"
	replace var="Village requests" if n==3 | n==4   
	drop parm label
	
saveold water_kling, replace	
restore

*************************************************************
* Multi-level models
*************************************************************
preserve
ren c_parts_services_14_16 c_parts_services2
ren c_parts_services_13_14 c_parts_services1
ren c_village_requests_14_16 c_village_requests2
ren c_village_requests_13_14 c_village_requests1
ren parts_services_14_16 parts_services2
ren parts_services_13_14 parts_services1
ren village_requests_14_16 village_requests2
ren village_requests_13_14 village_requests1
local covs c_age c_lugbara_share c_literacy c_Arua_dist

keep `covs' parts_services2 c_parts_services2 parts_services1 c_parts_services1 ///
village_requests1 village_requests2  c_village_requests1 c_village_requests2 treat DataSetID cluster_id
reshape long c_parts_services c_village_requests parts_services village_requests, i(DataSetID) j(wave)

tempfile parts_services1 parts_services2 parts_services3 village_requests1 village_requests2 
tempfile c_parts_services1 c_parts_services2 c_parts_services3 c_village_requests1 c_village_requests2 

foreach y in parts_services village_requests c_parts_services c_village_requests {
	mixed `y' i.treat || DataSetID:, cl(cluster_id)
	capture est sto `y'_m1	
	margins , dydx(treat) post
	parmest, label bmat(r(b)) vmat(r(V)) saving(``y'1')
	
	mixed `y' i.treat $controls || DataSetID:, cl(cluster_id)
	capture est sto `y'_m2
	margins , dydx(treat) post
	parmest, label bmat(r(b)) vmat(r(V)) saving(``y'2')	
	}
	
restore	

preserve
use `c_parts_services1', clear 
	append using `c_parts_services2' 
	append using `c_village_requests1' 
	append using `c_village_requests2' 

	keep if parm=="1.treat"
	
	gen n=_n
	gen CovAdjust="No"
	replace CovAdjust="Yes" if n==2 | n==4
	
	gen var="Parts & services"
	replace var="Village requests" if n==3 | n==4   
	drop parm label
	
saveold water_multilevel, replace	
restore	

*************************************************************
// graph raw data
*************************************************************

preserve
	gen service2 =0
	replace service2=1 if  parts_services_14_16>0 & parts_services_14_16!=.
	gen service1 =0
	replace service1=1 if  parts_services_13_14>0 & parts_services_13_14!=.
	
	gen requests2=0
	replace requests2=1 if  village_requests_14_16>0 & village_requests_14_16!=.
	gen requests1 =0
	replace requests1=1 if  village_requests_13_14>0 & village_requests_13_14!=.

	keep service1 service2 requests1 requests2 treat
	collapse (sum) service1 service2 requests1 requests2  , by(treat)
	reshape long service requests, i(treat) j(period)
	
	* label vars
lab var treat "Treatment"
lab define treat 0 "Control" 1 "Treatment"
lab value treat treat

lab define period 1 "Baseline" 2 "Endline", modify
lab value period period

lab var service "Parts and services"
lab var requests "Village requests"

**********************************************************************	
* FIGURE 13 in Supplementary Information
**********************************************************************

graph bar service, over(treat) over(period)  ytitle(Number of villages) title(Parts and services) name(service) 
graph bar request, over(treat) over(period)  ytitle(Number of villages) title(Village requests) name(request) 

	graph combine service request, title(Water services)

restore

**********************************************************************	
* relation between distance and messages at the village-level
**********************************************************************
ren RelevantMess_ct topicTotal
renvars *_n,   subst(_n )
	
foreach var in topicEducation topicHealth topicWater{
	replace `var'=0 if `var'==.
	}

set scheme plottig

**********************************************************************	
* FIGURE 19 in Supplementary Information
**********************************************************************

foreach	y in Education Health Water Total{
	scatter topic`y' Arua_dist if ldist>0 & treat==1 || lowess topic`y' Arua_dist if ldist>0 & treat==1,  lw(vthick) title("`y' messaging") ytitle("`y' messages (cluster)") xtitle("Distance to Arua") legend(off)  name(topic`y')
	}

	graph combine topicTotal topicEducation topicHealth topicWater, title("Messaging and distance to district HQs")
				
graph drop _all
set scheme plottig

**********************************************************************	
* FIGURE 24 in Supplementary Information
**********************************************************************

foreach	y in Education Health Water Total{
	scatter topic`y' c_poverty_census if ldist>0 & treat==1 & c_poverty_census< 3|| lowess topic`y' c_poverty_census if ldist>0 & treat==1 & c_poverty_census< 3,  lw(vthick) title("`y' messaging") ytitle("`y' messages (cluster)") xtitle("Wealth") legend(off)  name(topic`y')
	}

	graph combine topicTotal topicEducation topicHealth topicWater, title("Messaging and village wealth")
	
*************************************************************
* modertator
*************************************************************
set scheme lean1 // lean schemes is not installed in Stata by default
gen Distance = Arua_dist
gen Poverty = c_poverty_census
gen Services2 = c_parts_services_14_16
gen Services1 = c_parts_services_13_14
gen Requests2 = c_village_requests_14_16
gen Requests1 = c_village_requests_13_14

set more off
graph drop _all

foreach j in Distance {
	foreach y in Services Requests{
	interflex `y'2 treat `j' `y'1 if Poverty<3, title("Water `y'") xlabel(`j') dlabel(UBridge) ylabel()  
	return list
	mat list r(estBin)
	mat list r(margeff)
	graph save Water`y'`j', replace
		}

*************************************************************
* FIGURE 22 in Supplementary Information
*************************************************************
		
	graph combine WaterServices`j'.gph WaterRequests`j'.gph
		}

foreach j in Poverty {
	foreach y in Services Requests{
	interflex `y'2 treat `j' `y'1 if Poverty<3, title("Water `y'") xlabel(`j') dlabel(UBridge) ylabel()  
	return list
	mat list r(estBin)
	mat list r(margeff)
	graph save Water`y'`j', replace
		}

*************************************************************
* FIGURE 27 in Supplementary Information
*************************************************************
		
	graph combine WaterServices`j'.gph WaterRequests`j'.gph
		}

*************************************************************
* Randomization Inference
*************************************************************

foreach i in parts_services village_requests  {
	regress c_`i'_14_16 treat c_`i'_13_14, cl(cluster_id)
	mat beta=e(b)
	svmat double beta
	ren beta1 beta`i'
	drop beta2-beta3
	su beta`i'
	local beta`i' = r(mean)
	ritest treat _b[ treat ], cluster(cluster_id) reps(5000) seed(05102018) right saving(`i', replace): regress c_`i'_14_16 treat c_`i'_13_14, cl(cluster_id)

	preserve
			use `i', clear
			gen t= `beta`i''
			gen c=0
			replace c=1 if _pm_1 >= t
			egen p = mean(c)
			saveold `i', replace
	restore
	}

****************************************		
preserve 
graph drop _all

************************************
* Figure 5 in Supplementary Information
************************************

	use parts_services.dta, clear
	quietly su t, de
	local t=round(r(mean),.001)
	quietly su p, de
	local p=round(r(mean),.001)
hist _pm_1 , kdensity lcolor() title(water: parts and services) addplot(pci 0 `t' 1.25 `t', lc(red) lp(solid) lw(thick)) text(.5 .7 "pval=`p'", size(small)) text(1 `t' "{&tau}=`t'",place(ne) size(small)) name(parts) legend(off) 

	use village_requests.dta, clear
	quietly su t, de
	local t=round(r(mean),.001)
	quietly su p, de
	local p=round(r(mean),.001)
hist _pm_1 , kdensity lcolor() title(water: village requests) addplot(pci 0 `t' 3.25 `t', lc(red) lp(solid) lw(thick)) text(1.5 .25 "pval=`p'", size(small)) text(3 `t' "{&tau}=`t'",place(ne) size(small)) name(requests) legend(off) 
	
	graph combine parts requests, title("Water analysis (RI)")
		
restore 	
*/		
		
************************************
* Second round of water admin data
************************************
*standarize variables 
foreach var in A2_LC1Request0 A2_LC1Request1 A2_LC1Request2{
	quietly summarize `var' if treat==0
	local `var'_mean= r(mean)
	local `var'_sd= r(sd) 
	gen c_`var' = (`var'-``var'_mean')/``var'_sd'
	qui egen mean_std_`var'=mean(c_`var') if treat==1
	replace c_`var' = mean_std_`var' if treat==1 & c_`var'==.
	replace c_`var' = 0 if treat==0 & c_`var'==.
	}
	
	*Regressions:
		*SHORT-TERM  
		reg c_A2_LC1Request1 i.treat c_A2_LC1Request0, cl(cluster_id)
		estadd local control "no"
		estadd local period "short-term"
		est store c_A2_LC1Request1a
		margins, at(treat=(0 1))
		test _b[1.treat]=0
		local sign_trt = sign(_b[1.treat])
		display "Ho: coef <= 0  p-value = " ttail(r(df_r),`sign_trt'*sqrt(r(F)))
		
		reg c_A2_LC1Request1 i.treat c_A2_LC1Request0 $controls, cl(cluster_id)
		estadd local control "yes"
		estadd local period "short-term"
		est store c_A2_LC1Request1b
		margins, at(treat=(0 1))	
		test _b[1.treat]=0
		local sign_trt = sign(_b[1.treat])
		display "Ho: coef <= 0  p-value = " ttail(r(df_r),`sign_trt'*sqrt(r(F)))
			
		* LONG-TERM (standardized)
		reg c_A2_LC1Request2 i.treat c_A2_LC1Request0, cl(cluster_id)
		estadd local control "no"
		estadd local period "long-term"
		est store c_A2_LC1Request2a
		margins, at(treat=(0 1))
		test _b[1.treat]=0
		local sign_trt = sign(_b[1.treat])
		display "Ho: coef <= 0  p-value = " ttail(r(df_r),`sign_trt'*sqrt(r(F)))
		
		reg c_A2_LC1Request2 i.treat c_A2_LC1Request0 $controls, cl(cluster_id)
		estadd local control "yes"
		estadd local period "long-term"
		est store c_A2_LC1Request2b
		margins, at(treat=(0 1))	
		test _b[1.treat]=0
		local sign_trt = sign(_b[1.treat])
		display "Ho: coef <= 0  p-value = " ttail(r(df_r),`sign_trt'*sqrt(r(F)))

log close 
clear
exit